SHQ = Sea Hero Quest score , CHQ = City hero quest scores
Importing libraries and dataset
# Importing libraries
import pandas
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import matplotlib.image as pltimg
import pydotplus
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
##importing datasets
demographics = pandas.read_csv(r"C:\Users\Arjun\Documents\msc\thesis\dataset\demographics.csv")
CHQ_zscore = pandas.read_csv(r"C:\Users\Arjun\Documents\msc\thesis\dataset\CHQ_trajectory_lengths.csv")
SHQ_zscore =pandas.read_csv(r"C:\Users\Arjun\Documents\msc\thesis\dataset\SHQ_trajectory_lengths.csv")
entropy=pandas.read_csv(r"C:\Users\Arjun\Documents\msc\thesis\dataset\HomeAddressEntropy.csv")
## Male :entropy vs performance (SHQ)
male_index=demographics.index[demographics['gender'] == "Male"].tolist() #getting the index of male
df4=SHQ_zscore.iloc[male_index]
df4.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
df_entro=entropy.iloc[male_index]
df_entro.drop(['address', 'environment'], axis=1, inplace=True)
entro_per = pandas.concat([df4, df_entro], axis=1, join='inner')
#plotting
fig = px.scatter(entro_per, x="zscore", y="entropy_adjusted")
fig.show()
print(entro_per.corr())
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return super().drop(
zscore entropy_adjusted zscore 1.000000 0.081297 entropy_adjusted 0.081297 1.000000
## female =entropy vs performance (shq scores)
df999=SHQ_zscore[["zscore"]]
df_entro_fem=entropy[["entropy_adjusted"]]
entro_per_performance = pandas.concat([df999, df_entro_fem], axis=1, join='inner')
#plot
fig = px.scatter(entro_per_performance, x="zscore", y="entropy_adjusted")
fig.show()
print(entro_per_performance.corr())
zscore entropy_adjusted zscore 1.000000 0.065541 entropy_adjusted 0.065541 1.000000
## male =education vs performance SHQ scores
Male_ter_index=demographics.index[(demographics['gender'] == 'Male') & (demographics['education'] == 'Tertiary') ]
#demographics.index[demographics['education'] == "Tertiary" ].tolist()
#getting the index of male
male_sec_index=demographics.index[(demographics['gender'] == 'Male') & (demographics['education'] == "Secondary")]
#getting zscore for ter
df6=SHQ_zscore.iloc[Male_ter_index]
df6.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
#getting zscore for sec
df7=SHQ_zscore.iloc[male_sec_index]
df7.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
#getting tertinary males
df_edu_male=demographics.iloc[Male_ter_index]
df_edu_male.drop(['subID','age','environment','city','street','still_live','city_current', 'city_current_type' ,'gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
#getting secondary males
df_edu2_male=demographics.iloc[male_sec_index]
df_edu2_male.drop(['subID','age','environment','city','street','still_live','city_current', 'city_current_type' ,'gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
#concat ter and zscore
edu_ter_male = pandas.concat([df6, df_edu_male], axis=1, join='inner')
#concat sec and zscore
edu_sec_male = pandas.concat([df7, df_edu2_male], axis=1, join='inner')
#plot ter and performace
fig_edu = px.scatter(edu_ter_male, x="zscore", y="education")
fig_edu.show()
#plot sec and performace
fig_edu2 = px.scatter(edu_sec_male, x="zscore", y="education")
fig_edu2.show()
plt.scatter(df6['zscore'], df_edu_male['education'])
plt.scatter(df7['zscore'],df_edu2_male['education'])
plt.show()
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
## female =education vs performance SHQ scores
female_ter_index=demographics.index[(demographics['gender'] == 'Female') & (demographics['education'] == 'Tertiary') ]
#demographics.index[demographics['education'] == "Tertiary" ].tolist()
#getting the index of male
female_sec_index=demographics.index[(demographics['gender'] == 'Female') & (demographics['education'] == "Secondary")]
#getting zscore for ter
df10=SHQ_zscore.iloc[female_ter_index]
df10.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
#getting zscore for sec
df11=SHQ_zscore.iloc[female_sec_index]
df11.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
#getting tertinary males
df_edu_female=demographics.iloc[female_ter_index]
df_edu_female.drop(['subID','age','environment','city','street','still_live','city_current', 'city_current_type' ,'gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
#getting secondary males
df_edu2_female=demographics.iloc[female_sec_index]
df_edu2_female.drop(['subID','age','environment','city','street','still_live','city_current', 'city_current_type' ,'gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
#concat ter and zscore
edu_ter_female = pandas.concat([df10, df_edu_female], axis=1, join='inner')
#concat sec and zscore
edu_sec_female = pandas.concat([df11, df_edu2_female], axis=1, join='inner')
#plot ter and performace
fig_edu = px.scatter(edu_ter_female, x="zscore", y="education")
fig_edu.show()
#plot sec and performace
fig_edu2 = px.scatter(edu_sec_female, x="zscore", y="education")
fig_edu2.show()
plt.scatter(df10['zscore'],df_edu_female['education'])
plt.scatter(df11['zscore'],df_edu2_female['education'])
plt.show()
##Age group and gender v/s shq Zscore males
male_index_age=demographics.index[demographics['gender'] == "Male"].tolist() #getting the index of male
df_age=SHQ_zscore.iloc[male_index_age] #getting male zscores
df_age.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
dff_dum=demographics.iloc[male_index_age]#getting only the male values from demographics
dff_dum.drop(['subID','environment','city','street','still_live','city_current', 'city_current_type' ,'education','gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
##Age classification
## <20,20-30,30-40,40-50,50-60,60<
#getting age ranges
#less than 20
df_20=dff_dum[dff_dum["age"] <20]
df2_z=dff_dum.index[dff_dum["age"] <20].tolist()
df22_z=SHQ_zscore.iloc[df2_z]
df22_z.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_20=df22_z["zscore"].mean()##getting mean
###range 20-30
df2_3=dff_dum[(dff_dum["age"] >20) & (dff_dum["age"] <30)]
df23_z=dff_dum.index[(dff_dum["age"] >20)& (dff_dum["age"] <30)].tolist()
df23z=SHQ_zscore.iloc[df23_z]
df23z.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_23=df23z["zscore"].mean()##getting mean
###Range 30-40
df3_4=dff_dum[(dff_dum["age"] >30) & (dff_dum["age"] <40)]
df34_z=dff_dum.index[(dff_dum["age"] >30)& (dff_dum["age"] <40)].tolist()
df34z=SHQ_zscore.iloc[df34_z]
df34z.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_34=df34z["zscore"].mean()##getting mean
##Range 40-50
df4_5=dff_dum[(dff_dum["age"] >40) & (dff_dum["age"] <50)]
df45_z=dff_dum.index[(dff_dum["age"] >40)& (dff_dum["age"] <50)].tolist()
df45z=SHQ_zscore.iloc[df45_z]
df45z.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_45=df45z["zscore"].mean()##getting mean
#range 50-60
df5_6=dff_dum[(dff_dum["age"] >50) & (dff_dum["age"] <60)]
df56_z=dff_dum.index[(dff_dum["age"] >50)& (dff_dum["age"] <60)].tolist()
df56z=SHQ_zscore.iloc[df56_z]
df56z.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_56=df56z["zscore"].mean()##getting mean
#range 60 <
df_60=dff_dum[dff_dum["age"] >60]
df66_z=dff_dum.index[dff_dum["age"] >60].tolist()
df6_z=SHQ_zscore.iloc[df66_z]
df6_z.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_6=df6_z["zscore"].mean()##getting mean
#list of all zscore
zscore_age=[z_20,z_23,z_34,z_45,z_56,z_6]
#age group
age_group=['less than 20','20-30','30-40','40-50','50-60','60 and above']
#plot
fig_age = plt.figure()
ax = fig_age.add_axes([1,1,1,1])
ax.bar(age_group,zscore_age)
plt.show()
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
##Age group and gender v/s shq Zscore (female)
female_index_age=demographics.index[demographics['gender'] == "Female"].tolist() #getting the index of male
df_agef=SHQ_zscore.iloc[female_index_age] #getting male zscores
df_agef.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
dff_dumf=demographics.iloc[female_index_age]#getting only the female values from demographics
dff_dumf.drop(['subID','environment','city','street','still_live','city_current', 'city_current_type' ,'education','gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
##Age classification
## <20,20-30,30-40,40-50,50-60,60<
#getting age ranges
#less than 20
df_20f=dff_dumf[dff_dumf["age"] <20]
df2_zf=dff_dumf.index[dff_dumf["age"] <20].tolist()
df22_zf=SHQ_zscore.iloc[df2_zf]
df22_zf.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_20f=df22_zf["zscore"].mean()##getting mean
###range 20-30
df2_3f=dff_dumf[(dff_dumf["age"] >20) & (dff_dumf["age"] <30)]
df23_zf=dff_dumf.index[(dff_dumf["age"] >20)& (dff_dumf["age"] <30)].tolist()
df23zf=SHQ_zscore.iloc[df23_zf]
df23zf.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_23f=df23zf["zscore"].mean()##getting mean
###Range 30-40
df3_4f=dff_dumf[(dff_dumf["age"] >30) & (dff_dumf["age"] <40)]
df34_zf=dff_dumf.index[(dff_dumf["age"] >30)& (dff_dumf["age"] <40)].tolist()
df34zf=SHQ_zscore.iloc[df34_zf]
df34zf.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_34f=df34zf["zscore"].mean()##getting mean
##Range 40-50
df4_5f=dff_dumf[(dff_dumf["age"] >40) & (dff_dumf["age"] <50)]
df45_zf=dff_dumf.index[(dff_dumf["age"] >40)& (dff_dumf["age"] <50)].tolist()
df45zf=SHQ_zscore.iloc[df45_zf]
df45zf.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_45f=df45zf["zscore"].mean()##getting mean
#range 50-60
df5_6f=dff_dumf[(dff_dumf["age"] >50) & (dff_dumf["age"] <60)]
df56_zf=dff_dumf.index[(dff_dumf["age"] >50)& (dff_dumf["age"] <60)].tolist()
df56zf=SHQ_zscore.iloc[df56_zf]
df56zf.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_56f=df56zf["zscore"].mean()##getting mean
#range 60 <
df_60f=dff_dumf[dff_dumf["age"] >60]
df66_zf=dff_dumf.index[dff_dumf["age"] >60].tolist()
df6_zf=SHQ_zscore.iloc[df66_zf]
df6_zf.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
z_6f=df6_zf["zscore"].mean()##getting mean
#list of all zscore
zscore_agef=[z_20f,z_23f,z_34f,z_45f,z_56f,z_6f]
#age group
age_groupf=['less than 20','20-30','30-40','40-50','50-60','60 and above']
#plot
fig_agef = plt.figure()
ax = fig_agef.add_axes([1,1,1,1])
ax.bar(age_groupf,zscore_agef)
plt.show()
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
CHQ_zscore.describe()
| L1 | L2 | L3 | L4 | L5 | zscore | |
|---|---|---|---|---|---|---|
| count | 599.000000 | 599.000000 | 599.000000 | 599.000000 | 599.000000 | 599.000000 |
| mean | 178.907503 | 823.811827 | 1408.538880 | 2011.317523 | 2518.588358 | -0.006363 |
| std | 10.074452 | 238.055430 | 741.900431 | 647.020752 | 1285.917418 | 0.689873 |
| min | 174.118568 | 690.982371 | 880.501090 | 1414.979831 | 104.118801 | -0.706967 |
| 25% | 175.930958 | 724.278912 | 1026.129677 | 1647.406994 | 1651.161803 | -0.455164 |
| 50% | 176.753292 | 750.540917 | 1143.520245 | 1782.555411 | 2101.890172 | -0.207621 |
| 75% | 178.405779 | 804.706204 | 1488.959810 | 2107.618325 | 3009.795591 | 0.200291 |
| max | 319.618046 | 3719.044078 | 9112.265440 | 6124.776388 | 12328.193120 | 4.252410 |
#male:entropy vs score
male_index=demographics.index[demographics['gender'] == "Male"].tolist() #getting the index of male
df14=CHQ_zscore.iloc[male_index]
df14.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
df_entro=entropy.iloc[male_index]
df_entro.drop(['address', 'environment'], axis=1, inplace=True)
entro_per = pandas.concat([df14, df_entro], axis=1, join='inner')
#plot
fig = px.scatter(entro_per, x="zscore", y="entropy_adjusted")
fig.show()
print(entro_per.corr())
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
zscore entropy_adjusted zscore 1.000000 -0.075333 entropy_adjusted -0.075333 1.000000
#female: entropy vs scores for city hero quest scores
female_index=demographics.index[demographics['gender'] == "Female"].tolist() #getting the index of male
df5=CHQ_zscore.iloc[female_index]
df5.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
df_entro_female=entropy.iloc[female_index]
df_entro_female.drop(['address', 'environment'], axis=1, inplace=True)
entro_per_female = pandas.concat([df5, df_entro_female], axis=1, join='inner')
#plot
fig = px.scatter(entro_per_female, x="zscore", y="entropy_adjusted")
fig.show()
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
# male: education level vs zscore for CHQ scores
Male_ter_index=demographics.index[(demographics['gender'] == 'Male') & (demographics['education'] == 'Tertiary') ]
#demographics.index[demographics['education'] == "Tertiary" ].tolist()
#getting the index of male
male_sec_index=demographics.index[(demographics['gender'] == 'Male') & (demographics['education'] == "Secondary")]
#getting zscore for ter
df6=CHQ_zscore.iloc[Male_ter_index]
df6.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
#getting zscore for sec
df7=CHQ_zscore.iloc[male_sec_index]
df7.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
#getting tertinary males
df_edu_male=demographics.iloc[Male_ter_index]
df_edu_male.drop(['subID','age','environment','city','street','still_live','city_current', 'city_current_type' ,'gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
#getting secondary males
df_edu2_male=demographics.iloc[male_sec_index]
df_edu2_male.drop(['subID','age','environment','city','street','still_live','city_current', 'city_current_type' ,'gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
#concat ter and zscore
edu_ter_male = pandas.concat([df6, df_edu_male], axis=1, join='inner')
#concat sec and zscore
edu_sec_male = pandas.concat([df7, df_edu2_male], axis=1, join='inner')
#plot ter and performace
fig_edu = px.scatter(edu_ter_male, x="zscore", y="education")
fig_edu.show()
#plot sec and performace
fig_edu2 = px.scatter(edu_sec_male, x="zscore", y="education")
fig_edu2.show()
plt.scatter(df6['zscore'], df_edu_male['education'])
plt.scatter(df7['zscore'],df_edu2_male['education'])
plt.show()
print("Tertiary Level education stats:")
print(edu_ter_male.describe())
print("")
print("Secondary Level education stats:")
print(edu_sec_male.describe())
Tertiary Level education stats:
zscore
count 170.000000
mean -0.069230
std 0.735013
min -0.687863
25% -0.485843
50% -0.335396
75% 0.065339
max 3.465768
Secondary Level education stats:
zscore
count 129.000000
mean -0.231491
std 0.401631
min -0.694041
25% -0.533377
50% -0.318836
75% -0.041631
max 1.983105
## female :education vs performance for city hero quest
female_ter_index=demographics.index[(demographics['gender'] == 'Female') & (demographics['education'] == 'Tertiary') ]
#demographics.index[demographics['education'] == "Tertiary" ].tolist()
#getting the index of male
female_sec_index=demographics.index[(demographics['gender'] == 'Female') & (demographics['education'] == "Secondary")]
#getting zscore for ter
df10=CHQ_zscore.iloc[female_ter_index]
df10.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
#getting zscore for sec
df11=CHQ_zscore.iloc[female_sec_index]
df11.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
#getting tertinary males
df_edu_female=demographics.iloc[female_ter_index]
df_edu_female.drop(['subID','age','environment','city','street','still_live','city_current', 'city_current_type' ,'gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
#getting secondary males
df_edu2_female=demographics.iloc[female_sec_index]
df_edu2_female.drop(['subID','age','environment','city','street','still_live','city_current', 'city_current_type' ,'gender','environment_binary','environment_current_binary'], axis=1, inplace=True)
#concat ter and zscore
edu_ter_female = pandas.concat([df10, df_edu_female], axis=1, join='inner')
#concat sec and zscore
edu_sec_female = pandas.concat([df11, df_edu2_female], axis=1, join='inner')
#plot ter and performace
fig_edu = px.scatter(edu_ter_female, x="zscore", y="education")
fig_edu.show()
#plot sec and performace
fig_edu2 = px.scatter(edu_sec_female, x="zscore", y="education")
fig_edu2.show()
plt.scatter(df10['zscore'],df_edu_female['education'])
plt.scatter(df11['zscore'],df_edu2_female['education'])
plt.show()
print("Tertiary Level education stats:")
print(edu_ter_female.describe())
print("")
print("Secondary Level education stats:")
print(edu_sec_female.describe())
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Tertiary Level education stats:
zscore
count 211.000000
mean 0.142371
std 0.718154
min -0.706967
25% -0.353357
50% -0.064839
75% 0.398240
max 3.293040
Secondary Level education stats:
zscore
count 89.000000
mean 0.087411
std 0.766394
min -0.678939
25% -0.388465
50% -0.142678
75% 0.401114
max 4.252410
### CHQ and SHQ performance comparision
#getting the scores
chq_score=CHQ_zscore[['zscore']]
shq_score=SHQ_zscore[['zscore']]
shq_score.rename(columns = {'zscore':'shq_z'}, inplace = True)
total_scores = pandas.concat([chq_score, shq_score], axis=1, join='inner')
fig3 = px.scatter(total_scores, x="zscore", y="shq_z")
fig3.show()
plt.scatter(shq_score['shq_z'],chq_score['zscore'],color='red')
plt.show()
print(total_scores.corr())
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
zscore shq_z zscore 1.000000 0.442451 shq_z 0.442451 1.000000
#SHQ vs CHQ male
male_index2=demographics.index[demographics['gender'] == "Male"].tolist()
df14=SHQ_zscore.iloc[male_index2]
df14.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
df14.rename(columns = {'zscore':'shq_z'}, inplace = True)
df15=CHQ_zscore.iloc[male_index2]
df15.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
chq_shq_m = pandas.concat([df14, df15], axis=1, join='inner')
fig5_m = px.scatter(chq_shq_m, x="zscore", y="shq_z")
fig5_m.show()
print(chq_shq_m.corr())
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
shq_z zscore shq_z 1.000000 0.523037 zscore 0.523037 1.000000
#SHQ vs CHQ female
female_index2=demographics.index[demographics['gender'] == "Female"].tolist()
df16=SHQ_zscore.iloc[female_index2]
df16.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
df16.rename(columns = {'zscore':'shq_z'}, inplace = True)
df17=CHQ_zscore.iloc[female_index2]
df17.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
chq_shq_f = pandas.concat([df16, df17], axis=1, join='inner')
fig5_f= px.scatter(chq_shq_f, x="zscore", y="shq_z")
fig5_f.show()
print(chq_shq_f.corr())
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
shq_z zscore shq_z 1.000000 0.344522 zscore 0.344522 1.000000
#CHQ vs SHQ for different age groups
##20 and below#
df18=SHQ_zscore.iloc[df2_zf]
df18.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
df18.rename(columns = {'zscore':'shq_z'}, inplace = True)
df19=CHQ_zscore.iloc[df2_zf]
df19.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
chq_shq_2 = pandas.concat([df18, df19], axis=1, join='inner')
fig_2= px.scatter(chq_shq_2, x="zscore", y="shq_z")
fig_2.show()
print(chq_shq_2.corr())
##20-30
df20=SHQ_zscore.iloc[df23_zf]
df20.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
df20.rename(columns = {'zscore':'shq_z'}, inplace = True)
df21=CHQ_zscore.iloc[df23_zf]
df21.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
chq_shq_23 = pandas.concat([df20, df21], axis=1, join='inner')
fig_23= px.scatter(chq_shq_23, x="zscore", y="shq_z")
fig_23.show()
print(chq_shq_23.corr())
##30-40
df22=SHQ_zscore.iloc[df34_zf]
df22.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
df22.rename(columns = {'zscore':'shq_z'}, inplace = True)
df23=CHQ_zscore.iloc[df34_zf]
df23.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
chq_shq_34 = pandas.concat([df22, df23], axis=1, join='inner')
fig_34= px.scatter(chq_shq_34, x="zscore", y="shq_z")
fig_34.show()
print(chq_shq_34.corr())
##40-50
df24=SHQ_zscore.iloc[df45_zf]
df24.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
df24.rename(columns = {'zscore':'shq_z'}, inplace = True)
df25=CHQ_zscore.iloc[df45_zf]
df25.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
chq_shq_45 = pandas.concat([df24, df25], axis=1, join='inner')
fig_45= px.scatter(chq_shq_45, x="zscore", y="shq_z")
fig_45.show()
print(chq_shq_45.corr())
##50-60
df26=SHQ_zscore.iloc[df56_zf]
df26.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
df26.rename(columns = {'zscore':'shq_z'}, inplace = True)
df27=CHQ_zscore.iloc[df56_zf]
df27.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
chq_shq_56 = pandas.concat([df26, df27], axis=1, join='inner')
fig_56= px.scatter(chq_shq_56, x="zscore", y="shq_z")
fig_56.show()
print(chq_shq_56.corr())
#60 and above
df28=SHQ_zscore.iloc[df66_zf]
df28.drop(['L1', 'L11','L32',"L42","L68"], axis=1, inplace=True)
df28.rename(columns = {'zscore':'shq_z'}, inplace = True)
df29=CHQ_zscore.iloc[df66_zf]
df29.drop(['L1', 'L2','L3',"L4","L5"], axis=1, inplace=True)
chq_shq_6 = pandas.concat([df28, df29], axis=1, join='inner')
fig_6= px.scatter(chq_shq_6, x="zscore", y="shq_z")
fig_6.show()
print(chq_shq_6.corr())
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
shq_z zscore shq_z 1.000000 0.107115 zscore 0.107115 1.000000
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
shq_z zscore shq_z 1.000000 0.352516 zscore 0.352516 1.000000
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
shq_z zscore shq_z 1.000000 0.503348 zscore 0.503348 1.000000
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
shq_z zscore shq_z 1.00000 0.36371 zscore 0.36371 1.00000
C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Arjun\anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
shq_z zscore shq_z 1.000000 -0.282146 zscore -0.282146 1.000000
shq_z zscore shq_z NaN NaN zscore NaN NaN